Kapitel 6.7: Deutungen¶
Das Notebook ergänzt Kapitel 6.7 'Deutungen'.
Import¶
In [1]:
import pandas as pd
import numpy as np
from itertools import combinations
from resources_statistics import *
from resources_geschichtslyrik import *
import plotly.express as px
import plotly.graph_objects as go
from plotly.validators.scatter.marker import SymbolValidator
from tqdm.notebook import tqdm
In [2]:
meta = pd.read_json(r"../resources/meta.json")
Korpora¶
Korpora erstellen¶
In [3]:
meta_anth = (
meta
.query("corpus=='anth'")
.query("1850 <= year <= 1918")
.query("geschichtslyrik == 1")
.drop_duplicates(subset='author_title')
)
meta_anth_bin = binarize_meta(meta_anth)
In [4]:
modcanon_authors = ['Hofmannsthal, Hugo von', 'Rilke, Rainer Maria', 'George, Stefan', 'Heym, Georg']
meta_modcanon = (
meta
.query("author in @modcanon_authors")
.query("1850 <= year <= 1918")
.query("geschichtslyrik == 1")
.drop_duplicates(subset='author_title')
)
In [5]:
muench_authors = ['Münchhausen, Börries von', 'Miegel, Agnes', 'Strauß und Torney, Lulu von']
meta_muench = (
meta
.query("author in @muench_authors")
.query("1850 <= year <= 1918")
.query("geschichtslyrik == 1")
.drop_duplicates(subset='author_title')
)
In [6]:
sub_df = pd.DataFrame()
sub_names = ['Anthologien', 'Kanonisierte Moderne', 'Münchhausen-Kreis']
sub_metas = [meta_anth, meta_modcanon, meta_muench]
Merkmale berechnen¶
In [7]:
for this_name, this_meta in zip(sub_names, sub_metas):
sub_df.loc[this_name, 'Jahr'] = round(this_meta['year'].mean(), 0)
sub_df.loc[this_name, 'Texte'] = this_meta.shape[0]
sub_df.loc[this_name, 'heroismus'] = this_meta.query("heroismus == 1").shape[0]/this_meta.shape[0]
sub_df.loc[this_name, 'nationalismus'] = this_meta.query("nationalismus == 1").shape[0]/this_meta.shape[0]
sub_df.loc[this_name, 'religiositaet'] = this_meta.query("religiositaet == 1").shape[0]/this_meta.shape[0]
sub_df.loc[this_name, 'deutungen_per_text'] = (
(this_meta.query("heroismus == 1").shape[0] +
this_meta.query("nationalismus == 1").shape[0] +
this_meta.query("religiositaet == 1").shape[0])
/this_meta.shape[0]
)
In [8]:
round(sub_df, 2)
Out[8]:
| Jahr | Texte | heroismus | nationalismus | religiositaet | deutungen_per_text | |
|---|---|---|---|---|---|---|
| Anthologien | 1875.0 | 1850.0 | 0.28 | 0.14 | 0.17 | 0.59 |
| Kanonisierte Moderne | 1903.0 | 113.0 | 0.09 | 0.00 | 0.04 | 0.13 |
| Münchhausen-Kreis | 1905.0 | 140.0 | 0.06 | 0.01 | 0.04 | 0.12 |
Zeitverlauf¶
In [9]:
ts = pd.DataFrame()
ts.index = pd.Series(range(1850, 1919), name = 'year')
In [10]:
ts['text_count'] = meta_anth.groupby('year').size()
ts['text_count'] = ts['text_count'].fillna(0)
ts['text_sum'] = smooth(ts['text_count'], mode = 'sum')
In [11]:
ts['heroismus_count'] = [meta_anth.query("year == @x and heroismus == 1").shape[0] for x in ts.index]
ts['heroismus_sum'] = smooth(ts['heroismus_count'], mode = 'sum')
ts['heroismus_share_smoothed'] = ts['heroismus_sum']/ts['text_sum']
ts['nationalismus_count'] = [meta_anth.query("year == @x and nationalismus == 1").shape[0] for x in ts.index]
ts['nationalismus_sum'] = smooth(ts['nationalismus_count'], mode = 'sum')
ts['nationalismus_share_smoothed'] = ts['nationalismus_sum']/ts['text_sum']
ts['religiositaet_count'] = [meta_anth.query("year == @x and religiositaet == 1").shape[0] for x in ts.index]
ts['religiositaet_sum'] = smooth(ts['religiositaet_count'], mode = 'sum')
ts['religiositaet_share_smoothed'] = ts['religiositaet_sum']/ts['text_sum']
ts['deutungen_count'] = ts['heroismus_count'] + ts['nationalismus_count'] + ts['religiositaet_count']
ts['deutungen_sum'] = smooth(ts['deutungen_count'], mode = 'sum')
ts['deutungen_per_text_smoothed'] = ts['deutungen_sum']/ts['text_sum']
Überblick¶
In [12]:
meta_plot = ts[[
'heroismus_share_smoothed',
'nationalismus_share_smoothed',
'religiositaet_share_smoothed',
]]
meta_plot.columns = [
'Heroismus',
'Nationalismus',
'Religiosität',
]
save_ts_data(meta_plot, prefix='06_07_Deutungen_')
fig = create_ts_plot(
data = meta_plot,
columns = meta_plot.columns,
y_axis_title = 'Anteil an Texten',
add_corporas = sub_df, add_corpora_names = sub_names,
add_corpora_categories = ['heroismus', 'nationalismus', 'religiositaet']
)
fig = update_fig_for_publication(fig)
fig.write_image(f"plots/6.7 Heroismus, Nationalismus und Religiosität im Zeitverlauf.pdf")
fig.show()
In [13]:
meta_anth_bin['period'] = [0 if 1850 <= x <= 1884 else 1 for x in meta_anth_bin['year']]
results = relations_binbin(
meta = meta_anth_bin,
main_feature = 'period',
comp_features = ['heroismus', 'nationalismus', 'religiositaet']
)
round(results.sort_values(by='diff', ascending=False), 2)
Out[13]:
| wenn_nicht | wenn_nicht_detail | wenn_ja | wenn_ja_detail | diff_low_bootstrap | diff_low | diff | diff_high | diff_high_bootstrap | chi2 | chi2_p | fisher_p | phi | min_real | min_expected | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| nationalismus | 0.13 | 181/1346 | 0.14 | 72/504 | -0.02 | -0.03 | 0.01 | 0.04 | 0.05 | 0.22 | 0.64 | 0.65 | 0.01 | 72.0 | 68.93 |
| religiositaet | 0.18 | 247/1346 | 0.14 | 71/504 | -0.08 | -0.08 | -0.04 | -0.01 | -0.00 | 4.68 | 0.03 | 0.03 | 0.05 | 71.0 | 86.63 |
| heroismus | 0.31 | 411/1346 | 0.22 | 112/504 | -0.13 | -0.13 | -0.08 | -0.04 | -0.04 | 12.50 | 0.00 | 0.00 | 0.08 | 112.0 | 142.48 |
In [14]:
# Texte 1870/71
meta_1870 = meta_anth.query("1870 <= year <= 1871").copy()
meta_1870['krieg_gegenwart'] = [1 if 'Krieg' in x and y == 0 else 0 for x, y in zip(meta_1870['stoffgebiet'], meta_1870['vergangenheitsdominant'])]
contingency_table = pd.crosstab(meta_1870['nationalismus'], meta_1870['krieg_gegenwart'])
print(contingency_table)
print("\n")
print(f"chi2 : {chi2_contingency(contingency_table)[0]}")
print(f"chi2 p : {chi2_contingency(contingency_table)[1]}")
print(f"phi : {get_phi(np.array(contingency_table))}")
krieg_gegenwart 0 1 nationalismus 0.0 38 14 1.0 27 30 chi2 : 6.436283331209948 chi2 p : 0.01118123465584003 phi : 0.2617175554294466
In [15]:
# Texte 1897/98
meta_1870 = meta_anth.query("1897 <= year <= 1898").copy()
meta_1870['wilhelm_bismarck'] = [1 if 'Wilhelm I.' in x or 'Bismarck' in x else 0 for x in meta_1870['entity_full']]
contingency_table = pd.crosstab(meta_1870['nationalismus'], meta_1870['wilhelm_bismarck'])
print(contingency_table)
print(f"fisher : {fisher_exact(contingency_table)[0]}")
print(f"fisher p : {fisher_exact(contingency_table)[1]}")
print(f"phi : {get_phi(np.array(contingency_table))}")
wilhelm_bismarck 0 1 nationalismus 0.0 19 3 1.0 1 8 fisher : 50.666666666666664 fisher p : 0.0001664180316789496 phi : 0.7139081384319523
In [16]:
# Texte 1914/18
meta_1914 = meta_anth_bin.query("1914 <= year <= 1918").copy()
meta_1914['krieg_gegenwart'] = [1 if x == 1 and y == 1 else 0 for x, y in zip(meta_1914['krieg'], meta_1914['gegenwartsdominant'])]
contingency_table = pd.crosstab(meta_1914['nationalismus'], meta_1914['krieg_gegenwart'])
print(contingency_table)
print("\n")
print(f"fisher : {fisher_exact(contingency_table)[0]}")
print(f"fisher p : {fisher_exact(contingency_table)[1]}")
print(f"phi : {get_phi(np.array(contingency_table))}")
krieg_gegenwart 0 1 nationalismus 0.0 28 9 1.0 6 7 fisher : 3.6296296296296298 fisher p : 0.08242528415219422 phi : 0.2775980150199401
In [17]:
meta_plot = ts[['deutungen_per_text_smoothed']]
meta_plot.columns = ['Deutungen pro Text']
fig = create_ts_plot(
data = meta_plot,
columns = meta_plot.columns,
y_axis_title = 'Anteil an Texten',
add_corporas = sub_df, add_corpora_names = sub_names,
add_corpora_categories = ['deutungen_per_text']
)
fig.show()
In [18]:
results = relations_binbin(
meta = meta_anth_bin,
main_feature = 'nationalismus',
comp_features = ['heroismus', 'religiositaet']
)
round(results, 2)
Out[18]:
| wenn_nicht | wenn_nicht_detail | wenn_ja | wenn_ja_detail | diff_low_bootstrap | diff_low | diff | diff_high | diff_high_bootstrap | chi2 | chi2_p | fisher_p | phi | min_real | min_expected | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| heroismus | 0.26 | 412/1597 | 0.44 | 111/253 | 0.11 | 0.12 | 0.18 | 0.25 | 0.24 | 35.19 | 0.0 | 0.0 | 0.14 | 111.0 | 71.52 |
| religiositaet | 0.15 | 240/1597 | 0.31 | 78/253 | 0.10 | 0.10 | 0.16 | 0.22 | 0.22 | 38.31 | 0.0 | 0.0 | 0.14 | 78.0 | 43.49 |
In [19]:
results = relations_binbin(
meta = meta_anth_bin,
main_feature = 'heroismus',
comp_features = ['religiositaet']
)
round(results, 2)
Out[19]:
| wenn_nicht | wenn_nicht_detail | wenn_ja | wenn_ja_detail | diff_low_bootstrap | diff_low | diff | diff_high | diff_high_bootstrap | chi2 | chi2_p | fisher_p | phi | min_real | min_expected | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| religiositaet | 0.16 | 211/1327 | 0.2 | 107/523 | 0.01 | 0.01 | 0.05 | 0.09 | 0.09 | 5.48 | 0.02 | 0.02 | 0.05 | 107.0 | 89.9 |
Heroismus¶
In [20]:
main_feature = 'heroismus'
In [21]:
meta_anth_bin.corr(numeric_only=True)[main_feature].sort_values(ascending = False).head(20)
Out[21]:
heroismus 1.000000 unbekanntes_individuum_positiv 0.309271 entity_positiv 0.294308 bekanntes_individuum_positiv 0.277802 krieg_positiv 0.276537 krieg 0.248916 ueberlieferung_positiv 0.232681 kollektiv_positiv 0.221385 fixierbarkeit 0.179551 sprechinstanz_nicht_in_vergangenheit 0.176901 marker_count 0.168803 stoffgebiet_positiv 0.167379 ortmarker_vorhanden 0.164351 words 0.164269 liebe_negativ 0.158114 religion_positiv 0.155404 politik_positiv 0.150203 nationalismus 0.137914 nation_volk_d_positiv 0.135351 ueberlieferung 0.126413 Name: heroismus, dtype: float64
In [22]:
meta_anth_bin.corr(numeric_only=True)[main_feature].sort_values(ascending = True).head(20)
Out[22]:
rollengedicht -0.186768 krieg_negativ -0.183105 entity_neutral -0.177260 sprechinstanz_in_vergangenheit -0.167335 stoffgebiet_neutral -0.144608 religion_negativ -0.097317 ueberlieferung_negativ -0.093884 unbekanntes_individuum_count -0.090588 religion -0.087040 sprechakt_beschreiben_vorhanden -0.087020 year_predict_ages_mean -0.084548 period -0.082185 decade -0.079516 year -0.078813 geschichtsauffassung_negativ -0.063062 antike -0.061992 nichtmensch_count -0.060986 stoffgebiet_negativ -0.058903 verfremdung -0.056760 in_hohem_mass_konkret -0.048917 Name: heroismus, dtype: float64
In [23]:
threshold = 0.2
bin_comp_features = get_features(meta_anth_bin.corr(numeric_only=True)[main_feature], threshold = threshold, mode = 'bin')
cont_comp_features = get_features(meta_anth_bin.corr(numeric_only=True)[main_feature], threshold = threshold, mode = 'cont')
In [24]:
results = relations_binbin(
meta = meta_anth_bin,
main_feature = main_feature,
comp_features = bin_comp_features
)
In [25]:
directly_related = [
'entity_positiv', # related to other entities
]
results_filtered = (
results
.query("index not in @directly_related")
.query("chi2_p < 0.05 and min_expected >= 5 and phi >= @threshold")
.sort_values(by = 'diff', ascending = False)
)
round(results_filtered, 2)
Out[25]:
| wenn_nicht | wenn_nicht_detail | wenn_ja | wenn_ja_detail | diff_low_bootstrap | diff_low | diff | diff_high | diff_high_bootstrap | chi2 | chi2_p | fisher_p | phi | min_real | min_expected | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| krieg_positiv | 0.41 | 193/469 | 0.69 | 227/328 | 0.21 | 0.21 | 0.28 | 0.35 | 0.35 | 60.95 | 0.0 | 0.0 | 0.28 | 101.0 | 155.15 |
| krieg | 0.35 | 469/1327 | 0.63 | 328/523 | 0.22 | 0.22 | 0.27 | 0.32 | 0.32 | 114.62 | 0.0 | 0.0 | 0.25 | 195.0 | 225.31 |
| unbekanntes_individuum_positiv | 0.29 | 148/506 | 0.55 | 69/125 | 0.16 | 0.16 | 0.26 | 0.36 | 0.36 | 29.92 | 0.0 | 0.0 | 0.22 | 56.0 | 42.99 |
| bekanntes_individuum_positiv | 0.49 | 674/1370 | 0.73 | 478/655 | 0.19 | 0.19 | 0.24 | 0.28 | 0.28 | 102.17 | 0.0 | 0.0 | 0.22 | 177.0 | 282.38 |
| ueberlieferung_positiv | 0.47 | 126/266 | 0.71 | 118/166 | 0.14 | 0.15 | 0.24 | 0.33 | 0.32 | 23.39 | 0.0 | 0.0 | 0.23 | 48.0 | 72.24 |
In [26]:
results_other = results.query("index not in @results_filtered.index")
round(results_other.sort_values(by='diff', ascending=False), 2)
Out[26]:
| wenn_nicht | wenn_nicht_detail | wenn_ja | wenn_ja_detail | diff_low_bootstrap | diff_low | diff | diff_high | diff_high_bootstrap | chi2 | chi2_p | fisher_p | phi | min_real | min_expected | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| entity_positiv | 0.41 | 1136/2758 | 0.63 | 735/1158 | 0.19 | 0.19 | 0.22 | 0.26 | 0.26 | 162.29 | 0.00 | 0.00 | 0.20 | 423.0 | 553.27 |
| stoffgebiet_positiv | 0.41 | 757/1825 | 0.60 | 427/715 | 0.14 | 0.14 | 0.18 | 0.22 | 0.23 | 68.69 | 0.00 | 0.00 | 0.16 | 288.0 | 333.29 |
| kollektiv_positiv | 0.32 | 230/724 | 0.49 | 166/340 | 0.11 | 0.11 | 0.17 | 0.23 | 0.23 | 28.80 | 0.00 | 0.00 | 0.16 | 166.0 | 126.54 |
| stoffgebiet_ambivalent | 0.14 | 248/1825 | 0.13 | 90/715 | -0.04 | -0.04 | -0.01 | 0.02 | 0.02 | 0.45 | 0.50 | 0.52 | 0.01 | 90.0 | 95.15 |
| entity_ambivalent | 0.07 | 186/2758 | 0.05 | 58/1158 | -0.03 | -0.03 | -0.02 | -0.00 | -0.00 | 4.20 | 0.04 | 0.04 | 0.03 | 58.0 | 72.15 |
| unbekanntes_individuum_negativ | 0.13 | 68/506 | 0.10 | 13/125 | -0.09 | -0.09 | -0.03 | 0.03 | 0.03 | 0.83 | 0.36 | 0.46 | 0.04 | 13.0 | 16.05 |
| entity_negativ | 0.17 | 481/2758 | 0.14 | 163/1158 | -0.06 | -0.06 | -0.03 | -0.01 | -0.01 | 6.72 | 0.01 | 0.01 | 0.04 | 163.0 | 190.44 |
| bekanntes_individuum_negativ | 0.15 | 204/1370 | 0.11 | 72/655 | -0.07 | -0.07 | -0.04 | -0.01 | -0.01 | 5.72 | 0.02 | 0.02 | 0.05 | 72.0 | 89.27 |
| kollektiv_negativ | 0.27 | 193/724 | 0.22 | 75/340 | -0.10 | -0.10 | -0.05 | 0.01 | 0.01 | 2.60 | 0.11 | 0.11 | 0.05 | 75.0 | 85.64 |
| stoffgebiet_negativ | 0.22 | 410/1825 | 0.18 | 126/715 | -0.08 | -0.08 | -0.05 | -0.01 | -0.02 | 7.24 | 0.01 | 0.01 | 0.05 | 126.0 | 150.88 |
| stoffgebiet_neutral | 0.22 | 410/1825 | 0.10 | 72/715 | -0.15 | -0.15 | -0.12 | -0.09 | -0.09 | 51.34 | 0.00 | 0.00 | 0.14 | 72.0 | 135.68 |
| entity_neutral | 0.35 | 955/2758 | 0.17 | 202/1158 | -0.20 | -0.20 | -0.17 | -0.14 | -0.14 | 115.68 | 0.00 | 0.00 | 0.17 | 202.0 | 342.14 |
In [27]:
result_categories = ['wenn_nicht', 'wenn_nicht_detail', 'wenn_ja', 'wenn_ja_detail', 'diff', 'chi2_p', 'phi',]
results_a = relations_binbin(
meta = meta_anth_bin.query("1850 <= year <= 1884"),
main_feature = main_feature,
comp_features = results_filtered.index
)
results_b = relations_binbin(
meta = meta_anth_bin.query("1885 <= year <= 1918"),
main_feature = main_feature,
comp_features = results_filtered.index
)
results_merged = results_a[result_categories].join(
results_b[result_categories],
lsuffix='_1850', rsuffix = '_1885'
)
results_merged['diff_of_diffs'] = results_merged['diff_1885'] - results_merged['diff_1850']
results_merged['diff_of_phis'] = results_merged['phi_1885'] - results_merged['phi_1850']
round(results_merged.sort_values(by = 'diff_of_phis'), 3)
Out[27]:
| wenn_nicht_1850 | wenn_nicht_detail_1850 | wenn_ja_1850 | wenn_ja_detail_1850 | diff_1850 | chi2_p_1850 | phi_1850 | wenn_nicht_1885 | wenn_nicht_detail_1885 | wenn_ja_1885 | wenn_ja_detail_1885 | diff_1885 | chi2_p_1885 | phi_1885 | diff_of_diffs | diff_of_phis | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| krieg | 0.313 | 293/935 | 0.642 | 264/411 | 0.329 | 0.000 | 0.308 | 0.449 | 176/392 | 0.571 | 64/112 | 0.122 | 0.022 | 0.102 | -0.207 | -0.206 |
| unbekanntes_individuum_positiv | 0.320 | 98/306 | 0.588 | 47/80 | 0.267 | 0.000 | 0.224 | 0.250 | 50/200 | 0.489 | 22/45 | 0.239 | 0.001 | 0.203 | -0.028 | -0.021 |
| krieg_positiv | 0.440 | 129/293 | 0.697 | 184/264 | 0.257 | 0.000 | 0.258 | 0.364 | 64/176 | 0.672 | 43/64 | 0.308 | 0.000 | 0.274 | 0.052 | 0.016 |
| bekanntes_individuum_positiv | 0.511 | 528/1033 | 0.735 | 399/543 | 0.224 | 0.000 | 0.216 | 0.433 | 146/337 | 0.705 | 79/112 | 0.272 | 0.000 | 0.235 | 0.048 | 0.020 |
| ueberlieferung_positiv | 0.533 | 97/182 | 0.721 | 93/129 | 0.188 | 0.001 | 0.190 | 0.345 | 29/84 | 0.676 | 25/37 | 0.330 | 0.001 | 0.306 | 0.142 | 0.116 |
In [28]:
results = relations_bincont(
meta = meta_anth_bin,
main_feature = main_feature,
comp_features = cont_comp_features
)
In [29]:
results
Out[29]:
Nationalismus¶
Ländervergleich¶
In [30]:
countries = ['Deutschland', 'Österreich', 'Schweiz', 'corpus']
In [31]:
results = pd.DataFrame()
for country in countries:
if country == 'corpus':
meta_anth_country = meta_anth
else:
meta_anth[country] = [1 if pd.notna(x) and country in x else 0 if pd.notna(x) else float('NaN') for x in meta_anth['author_gnd_countries']]
meta_anth_country = meta_anth[meta_anth[country] == 1]
results.loc[country, 'authors'] = meta_anth_country['author'].nunique()
results.loc[country, 'texts'] = meta_anth_country.shape[0]
results.loc[country, 'nationalismus'] = meta_anth_country.query("nationalismus == 1").shape[0]
results.loc[country, 'kein_nationalismus'] = meta_anth_country.query("nationalismus == 0").shape[0]
results['nationalismus_share'] = results['nationalismus']/results['texts']
results
Out[31]:
| authors | texts | nationalismus | kein_nationalismus | nationalismus_share | |
|---|---|---|---|---|---|
| Deutschland | 407.0 | 1513.0 | 217.0 | 1296.0 | 0.143424 |
| Österreich | 76.0 | 242.0 | 31.0 | 211.0 | 0.128099 |
| Schweiz | 31.0 | 116.0 | 7.0 | 109.0 | 0.060345 |
| corpus | 528.0 | 1850.0 | 253.0 | 1597.0 | 0.136757 |
In [32]:
# Tests
countries = [x for x in countries if x != 'corpus']
pairs = list(combinations(countries, 2))
for pair in pairs:
country_a = pair[0]
country_b = pair[1]
contingency_table = [
[meta_anth[meta_anth[country_a] == 1].query("nationalismus == 1").shape[0],
meta_anth[meta_anth[country_a] == 1].query("nationalismus == 0").shape[0]],
[meta_anth[meta_anth[country_b] == 1].query("nationalismus == 1").shape[0],
meta_anth[meta_anth[country_b] == 1].query("nationalismus == 0").shape[0]],
]
contingency_table_df = pd.DataFrame(contingency_table)
contingency_table_df.columns = ['nationalismus', 'kein_nationalismus']
contingency_table_df.index = [country_a, country_b]
chi2 = chi2_contingency(contingency_table_df)
print(f"{country_a} vs. {country_b}")
print(contingency_table_df)
print(f"chi2 : {chi2[0]}")
print(f"chi2 p : {chi2[1]}")
print("\n")
Deutschland vs. Österreich
nationalismus kein_nationalismus
Deutschland 217 1296
Österreich 31 211
chi2 : 0.2873575793383042
chi2 p : 0.5919188527980483
Deutschland vs. Schweiz
nationalismus kein_nationalismus
Deutschland 217 1296
Schweiz 7 109
chi2 : 5.589164638365113
chi2 p : 0.018071912701037213
Österreich vs. Schweiz
nationalismus kein_nationalismus
Österreich 31 211
Schweiz 7 109
chi2 : 3.1134840942369983
chi2 p : 0.07764669880025785
Annotierte Merkmale¶
In [33]:
main_feature = 'nationalismus'
In [34]:
meta_anth_bin.corr(numeric_only=True)[main_feature].sort_values(ascending = False).head(20)
Out[34]:
nationalismus 1.000000 nation_volk_d 0.416343 gegenwartsdominant 0.396130 gegenwartsbezug 0.370030 sprechinstanz_nicht_in_vergangenheit 0.308706 krieg_positiv 0.298258 wissen_identisch 0.282898 kollektiv_positiv 0.256427 stoffgebiet_positiv 0.254551 sprechakt_behaupten_vorhanden 0.253730 neuzeit 0.246590 ende 0.245150 zeit_mitte 0.243299 nogenre 0.241031 beginn 0.236099 politik_positiv 0.228762 tod_positiv 0.222436 nation_volk_d_positiv 0.214259 zeitebenen 0.208573 sprechinstanz_markiert 0.196863 Name: nationalismus, dtype: float64
In [35]:
meta_anth_bin.corr(numeric_only=True)[main_feature].sort_values(ascending = True).head(20)
Out[35]:
wissen_ergaenzend -0.304694 in_hohem_mass_konkret -0.244200 konkretheit -0.239934 ballade -0.214500 geschichtsauffassung_negativ -0.195438 nation_volk_d_negativ -0.173990 mittelalter -0.170806 sprechakt_erzaehlen_vorhanden -0.168332 krieg_negativ -0.157424 stoffgebiet_neutral -0.153114 ereignis -0.142967 entity_neutral -0.139717 religion -0.127650 antike -0.126415 sprechinstanz_in_vergangenheit -0.110215 stoffgebiet_negativ -0.103728 unbekanntes_individuum_count -0.100729 unbekanntes_individuum_negativ -0.095226 kleinraum_count -0.092309 tod_negativ -0.076042 Name: nationalismus, dtype: float64
In [36]:
threshold = 0.2
bin_comp_features = get_features(meta_anth_bin.corr(numeric_only=True)[main_feature], threshold = threshold, mode = 'bin')
bin_comp_features = bin_comp_features + ['religiositaet', 'heroismus', 'sprechinstanz_markiert']
cont_comp_features = get_features(meta_anth_bin.corr(numeric_only=True)[main_feature], threshold = threshold, mode = 'cont')
In [37]:
results = relations_binbin(
meta = meta_anth_bin,
main_feature = main_feature,
comp_features = bin_comp_features
)
In [38]:
directly_related = [
'gegenwartsdominant', 'sprechinstanz_nicht_in_vergangenheit', # related to gegenwartsbezug
'wissen_ergaenzend', # related to wissen_identisch
'ballade', # related to nogenre
]
results_filtered = (
results
.query("index not in @directly_related")
.query("chi2_p < 0.05 and min_expected >= 5 and phi >= @threshold")
.sort_values(by = 'diff', ascending = False)
)
round(results_filtered, 2)
Out[38]:
| wenn_nicht | wenn_nicht_detail | wenn_ja | wenn_ja_detail | diff_low_bootstrap | diff_low | diff | diff_high | diff_high_bootstrap | chi2 | chi2_p | fisher_p | phi | min_real | min_expected | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| gegenwartsbezug | 0.22 | 356/1597 | 0.71 | 180/253 | 0.43 | 0.43 | 0.49 | 0.55 | 0.54 | 253.31 | 0.0 | 0.0 | 0.37 | 73.0 | 73.30 |
| krieg_positiv | 0.46 | 305/663 | 0.86 | 115/134 | 0.33 | 0.33 | 0.40 | 0.47 | 0.47 | 70.90 | 0.0 | 0.0 | 0.30 | 19.0 | 63.39 |
| neuzeit | 0.49 | 786/1597 | 0.85 | 215/253 | 0.31 | 0.31 | 0.36 | 0.41 | 0.41 | 112.49 | 0.0 | 0.0 | 0.25 | 38.0 | 116.11 |
| stoffgebiet_positiv | 0.42 | 915/2181 | 0.75 | 269/359 | 0.28 | 0.28 | 0.33 | 0.38 | 0.38 | 134.71 | 0.0 | 0.0 | 0.23 | 90.0 | 167.34 |
| politik_positiv | 0.51 | 118/232 | 0.80 | 41/51 | 0.17 | 0.17 | 0.30 | 0.42 | 0.41 | 14.81 | 0.0 | 0.0 | 0.23 | 10.0 | 22.35 |
| nogenre | 0.19 | 300/1597 | 0.48 | 122/253 | 0.23 | 0.23 | 0.29 | 0.36 | 0.36 | 107.48 | 0.0 | 0.0 | 0.24 | 122.0 | 57.71 |
| kollektiv_positiv | 0.32 | 282/878 | 0.61 | 114/186 | 0.22 | 0.22 | 0.29 | 0.37 | 0.37 | 55.90 | 0.0 | 0.0 | 0.23 | 72.0 | 69.23 |
| wissen_identisch | 0.10 | 165/1597 | 0.39 | 99/253 | 0.22 | 0.23 | 0.29 | 0.35 | 0.35 | 148.06 | 0.0 | 0.0 | 0.28 | 99.0 | 36.10 |
| sprechakt_behaupten_vorhanden | 0.15 | 235/1597 | 0.43 | 110/253 | 0.22 | 0.22 | 0.29 | 0.35 | 0.35 | 119.10 | 0.0 | 0.0 | 0.25 | 110.0 | 47.18 |
| nation_volk_d | 0.00 | 3/1597 | 0.21 | 53/253 | 0.16 | 0.16 | 0.21 | 0.26 | 0.26 | 320.68 | 0.0 | 0.0 | 0.42 | 3.0 | 7.66 |
| in_hohem_mass_konkret | 0.86 | 1366/1597 | 0.58 | 147/253 | -0.33 | -0.34 | -0.27 | -0.21 | -0.22 | 110.32 | 0.0 | 0.0 | 0.24 | 106.0 | 46.09 |
In [39]:
results_other = results.query("index not in @results_filtered.index")
round(results_other.sort_values(by='diff', ascending=False), 2)
Out[39]:
| wenn_nicht | wenn_nicht_detail | wenn_ja | wenn_ja_detail | diff_low_bootstrap | diff_low | diff | diff_high | diff_high_bootstrap | chi2 | chi2_p | fisher_p | phi | min_real | min_expected | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| nation_volk_d_positiv | 0.33 | 1/3 | 0.75 | 40/53 | -0.21 | -0.12 | 0.42 | 0.97 | 0.83 | 2.57 | 0.11 | 0.17 | 0.21 | 1.0 | 0.80 |
| gegenwartsdominant | 0.09 | 142/1597 | 0.49 | 125/253 | 0.34 | 0.34 | 0.41 | 0.47 | 0.47 | 290.30 | 0.00 | 0.00 | 0.40 | 125.0 | 36.51 |
| sprechinstanz_nicht_in_vergangenheit | 0.22 | 355/1597 | 0.62 | 158/253 | 0.34 | 0.34 | 0.40 | 0.47 | 0.46 | 176.30 | 0.00 | 0.00 | 0.31 | 95.0 | 70.16 |
| sprechinstanz_markiert | 0.40 | 638/1597 | 0.68 | 173/253 | 0.22 | 0.22 | 0.28 | 0.35 | 0.35 | 71.70 | 0.00 | 0.00 | 0.20 | 80.0 | 110.91 |
| entity_positiv | 0.45 | 1519/3401 | 0.68 | 352/515 | 0.19 | 0.19 | 0.24 | 0.28 | 0.28 | 100.57 | 0.00 | 0.00 | 0.16 | 163.0 | 246.06 |
| bekanntes_individuum_positiv | 0.54 | 966/1781 | 0.76 | 186/244 | 0.16 | 0.16 | 0.22 | 0.28 | 0.28 | 42.31 | 0.00 | 0.00 | 0.14 | 58.0 | 105.19 |
| tod_positiv | 0.09 | 17/191 | 0.31 | 8/26 | 0.04 | 0.04 | 0.22 | 0.40 | 0.39 | 10.74 | 0.00 | 0.00 | 0.22 | 8.0 | 3.00 |
| heroismus | 0.26 | 412/1597 | 0.44 | 111/253 | 0.12 | 0.12 | 0.18 | 0.25 | 0.25 | 35.19 | 0.00 | 0.00 | 0.14 | 111.0 | 71.52 |
| unbekanntes_individuum_positiv | 0.33 | 196/590 | 0.51 | 21/41 | 0.02 | 0.02 | 0.18 | 0.34 | 0.33 | 5.50 | 0.02 | 0.03 | 0.09 | 20.0 | 14.10 |
| religiositaet | 0.15 | 240/1597 | 0.31 | 78/253 | 0.10 | 0.10 | 0.16 | 0.22 | 0.22 | 38.31 | 0.00 | 0.00 | 0.14 | 78.0 | 43.49 |
| entity_ambivalent | 0.07 | 225/3401 | 0.04 | 19/515 | -0.05 | -0.05 | -0.03 | -0.01 | -0.01 | 6.56 | 0.01 | 0.01 | 0.04 | 19.0 | 32.09 |
| stoffgebiet_ambivalent | 0.14 | 301/2181 | 0.10 | 37/359 | -0.07 | -0.07 | -0.03 | -0.00 | -0.00 | 3.26 | 0.07 | 0.08 | 0.04 | 37.0 | 47.77 |
| entity_negativ | 0.17 | 583/3401 | 0.12 | 61/515 | -0.08 | -0.08 | -0.05 | -0.02 | -0.02 | 9.13 | 0.00 | 0.00 | 0.05 | 61.0 | 84.69 |
| bekanntes_individuum_negativ | 0.14 | 255/1781 | 0.09 | 21/244 | -0.09 | -0.10 | -0.06 | -0.02 | -0.02 | 5.95 | 0.01 | 0.01 | 0.05 | 21.0 | 33.26 |
| kollektiv_negativ | 0.27 | 234/878 | 0.18 | 34/186 | -0.14 | -0.15 | -0.08 | -0.02 | -0.02 | 5.71 | 0.02 | 0.02 | 0.07 | 34.0 | 46.85 |
| unbekanntes_individuum_negativ | 0.14 | 80/590 | 0.02 | 1/41 | -0.16 | -0.17 | -0.11 | -0.06 | -0.05 | 4.24 | 0.04 | 0.05 | 0.08 | 1.0 | 5.26 |
| stoffgebiet_negativ | 0.23 | 497/2181 | 0.11 | 39/359 | -0.16 | -0.16 | -0.12 | -0.08 | -0.08 | 26.33 | 0.00 | 0.00 | 0.10 | 39.0 | 75.76 |
| entity_neutral | 0.32 | 1074/3401 | 0.16 | 83/515 | -0.19 | -0.19 | -0.15 | -0.12 | -0.12 | 51.37 | 0.00 | 0.00 | 0.11 | 83.0 | 152.16 |
| stoffgebiet_neutral | 0.21 | 468/2181 | 0.04 | 14/359 | -0.20 | -0.20 | -0.18 | -0.15 | -0.15 | 61.81 | 0.00 | 0.00 | 0.16 | 14.0 | 68.13 |
| ballade | 0.60 | 962/1597 | 0.29 | 74/253 | -0.37 | -0.37 | -0.31 | -0.25 | -0.25 | 85.12 | 0.00 | 0.00 | 0.21 | 74.0 | 111.32 |
| wissen_ergaenzend | 0.80 | 1278/1597 | 0.42 | 105/253 | -0.45 | -0.45 | -0.39 | -0.32 | -0.32 | 171.75 | 0.00 | 0.00 | 0.30 | 105.0 | 63.87 |
In [40]:
result_categories = ['wenn_nicht', 'wenn_nicht_detail', 'wenn_ja', 'wenn_ja_detail', 'diff', 'chi2_p', 'phi',]
results_a = relations_binbin(
meta = meta_anth_bin.query("1850 <= year <= 1884"),
main_feature = main_feature,
comp_features = results_filtered.index
)
results_b = relations_binbin(
meta = meta_anth_bin.query("1885 <= year <= 1918"),
main_feature = main_feature,
comp_features = results_filtered.index
)
results_merged = results_a[result_categories].join(
results_b[result_categories],
lsuffix='_1850', rsuffix = '_1885'
)
results_merged['diff_of_diffs'] = results_merged['diff_1885'] - results_merged['diff_1850']
results_merged['diff_of_phis'] = results_merged['phi_1885'] - results_merged['phi_1850']
round(results_merged.sort_values(by = 'diff_of_phis'), 3)
Out[40]:
| wenn_nicht_1850 | wenn_nicht_detail_1850 | wenn_ja_1850 | wenn_ja_detail_1850 | diff_1850 | chi2_p_1850 | phi_1850 | wenn_nicht_1885 | wenn_nicht_detail_1885 | wenn_ja_1885 | wenn_ja_detail_1885 | diff_1885 | chi2_p_1885 | phi_1885 | diff_of_diffs | diff_of_phis | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| neuzeit | 0.458 | 533/1165 | 0.829 | 150/181 | 0.371 | 0.000 | 0.253 | 0.586 | 253/432 | 0.903 | 65/72 | 0.317 | 0.000 | 0.230 | -0.054 | -0.023 |
| gegenwartsbezug | 0.224 | 261/1165 | 0.718 | 130/181 | 0.494 | 0.000 | 0.371 | 0.220 | 95/432 | 0.694 | 50/72 | 0.475 | 0.000 | 0.367 | -0.020 | -0.005 |
| kollektiv_positiv | 0.325 | 221/679 | 0.616 | 90/146 | 0.291 | 0.000 | 0.229 | 0.307 | 61/199 | 0.600 | 24/40 | 0.293 | 0.000 | 0.229 | 0.003 | -0.000 |
| nation_volk_d | 0.001 | 1/1165 | 0.199 | 36/181 | 0.198 | 0.000 | 0.413 | 0.005 | 2/432 | 0.236 | 17/72 | 0.231 | 0.000 | 0.425 | 0.033 | 0.012 |
| nogenre | 0.158 | 184/1165 | 0.425 | 77/181 | 0.267 | 0.000 | 0.231 | 0.269 | 116/432 | 0.625 | 45/72 | 0.356 | 0.000 | 0.268 | 0.089 | 0.037 |
| sprechakt_behaupten_vorhanden | 0.138 | 161/1165 | 0.403 | 73/181 | 0.265 | 0.000 | 0.239 | 0.171 | 74/432 | 0.514 | 37/72 | 0.343 | 0.000 | 0.289 | 0.077 | 0.051 |
| krieg_positiv | 0.498 | 229/460 | 0.866 | 84/97 | 0.368 | 0.000 | 0.281 | 0.374 | 76/203 | 0.838 | 31/37 | 0.463 | 0.000 | 0.337 | 0.095 | 0.055 |
| stoffgebiet_positiv | 0.455 | 730/1603 | 0.769 | 193/251 | 0.314 | 0.000 | 0.215 | 0.320 | 185/578 | 0.704 | 76/108 | 0.384 | 0.000 | 0.288 | 0.070 | 0.073 |
| in_hohem_mass_konkret | 0.857 | 998/1165 | 0.613 | 111/181 | -0.243 | 0.000 | 0.218 | 0.852 | 368/432 | 0.500 | 36/72 | -0.352 | 0.000 | 0.309 | -0.108 | 0.091 |
| wissen_identisch | 0.099 | 115/1165 | 0.337 | 61/181 | 0.238 | 0.000 | 0.241 | 0.116 | 50/432 | 0.528 | 38/72 | 0.412 | 0.000 | 0.380 | 0.174 | 0.139 |
| politik_positiv | 0.508 | 94/185 | 0.750 | 27/36 | 0.242 | 0.008 | 0.179 | 0.511 | 24/47 | 0.933 | 14/15 | 0.423 | 0.003 | 0.372 | 0.181 | 0.192 |
In [41]:
results = relations_bincont(
meta = meta_anth_bin,
main_feature = main_feature,
comp_features = cont_comp_features
)
In [42]:
round(results.sort_values(by = 'pointbiserialr_corr', ascending = False), 2)
Out[42]:
| wenn_nicht | a_merkmal=0 | a_merkmal=1 | a_merkmal=2 | a_merkmal=3 | a_merkmal>=4 | wenn_ja | b_merkmal=0 | b_merkmal=1 | b_merkmal=2 | ... | pointbiserialr_corr | pointbiserialr_p | ttest_p | cohens_d | mannwhitneyu_stat | mannwhitneyu_p | meandiffs_ci_lower | meandiffs_ci_bootstrap_lower | meandiffs_ci_upper | meandiffs_ci_bootstrap_upper | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| ende | 1244.84 | 0.0 [5/1590] | 0.0 [0/1590] | 0.0 [0/1590] | 0.0 [0/1590] | 0.92 [1459/1590] | 1727.17 | 0.0 [0/252] | 0.0 [0/252] | 0.0 [0/252] | ... | 0.25 | 0.0 | 0.0 | -0.88 | 73108.5 | 0.0 | 395.12 | 426.29 | 569.54 | 535.68 |
| zeit_mitte | 1231.84 | 0.0 [7/1590] | 0.0 [0/1590] | 0.0 [0/1590] | 0.0 [0/1590] | 0.92 [1455/1590] | 1714.99 | 0.0 [0/252] | 0.0 [0/252] | 0.0 [0/252] | ... | 0.24 | 0.0 | 0.0 | -0.87 | 74364.5 | 0.0 | 395.08 | 424.83 | 571.21 | 537.16 |
| beginn | 1218.85 | 0.01 [9/1590] | 0.0 [0/1590] | 0.0 [0/1590] | 0.0 [0/1590] | 0.91 [1446/1590] | 1702.81 | 0.0 [0/252] | 0.0 [0/252] | 0.0 [0/252] | ... | 0.24 | 0.0 | 0.0 | -0.83 | 75685.0 | 0.0 | 392.88 | 419.01 | 575.02 | 539.69 |
| zeitebenen | 1.95 | 0.0 [0/1597] | 0.31 [498/1597] | 0.46 [742/1597] | 0.19 [296/1597] | 0.04 [61/1597] | 2.47 | 0.0 [0/253] | 0.11 [28/253] | 0.44 [112/253] | ... | 0.21 | 0.0 | 0.0 | -0.60 | 137540.5 | 0.0 | 0.41 | 0.40 | 0.63 | 0.64 |
| konkretheit | 0.92 | 0.01 [16/1597] | 0.86 [1366/1597] | 0.0 [0/1597] | 0.0 [0/1597] | 0.0 [0/1597] | 0.77 | 0.04 [9/253] | 0.58 [147/253] | 0.0 [0/253] | ... | -0.24 | 0.0 | 0.0 | 0.62 | 257631.5 | 0.0 | -0.18 | -0.19 | -0.12 | -0.11 |
5 rows × 22 columns
In [43]:
meta_plot = meta_anth_bin.copy()
for cont_comp_feature in cont_comp_features:
mean_main = meta_plot[meta_plot[main_feature] == 1][cont_comp_feature].mean()
mean_notmain = meta_plot[meta_plot[main_feature] == 0][cont_comp_feature].mean()
if cont_comp_feature == 'zeit_mitte':
label_main = f"Nationalistische Texte<br>(Mittelwert = {round(mean_main)})"
label_notmain = f"Nicht nationalistische Texte<br>(Mittelwert = {round(mean_notmain)})"
else:
label_main = f"Nationalistische Texte<br>(Mittelwert = {round(mean_main, 2)})"
label_notmain = f"Nicht nationalistische Texte<br>(Mittelwert = {round(mean_notmain, 2)})"
meta_plot['plot_legend'] = [label_main if x == 1 else label_notmain for x in meta_plot[main_feature]]
meta_plot['zeit_mitte'] = meta_plot['zeit_mitte'].clip(lower=0)
fig = px.histogram(
meta_plot,
x = cont_comp_feature,
color = 'plot_legend',
histnorm = 'probability density',
barmode = 'group',
labels = {'plot_legend' : '',
'stoffgebiet_positiv' : 'Anzahl positiv bewertete Stoffgebiete',
'zeit_mitte' : 'Mitte der dominanten Zeitebene',
}
)
fig.update_layout(
width = 700, height = 300,
yaxis_title="Anteil",
xaxis=dict(tickfont=dict(size=16), titlefont=dict(size=16)),
yaxis=dict(tickfont=dict(size=16), titlefont=dict(size=16)),
legend=dict(font = dict(size=16), x=0.5, y = 0.96),
bargap=0.1
)
if cont_comp_feature == 'zeit_mitte':
fig.update_layout(legend=dict(x=0.15, y = 0.9),)
# fig.write_image(f"plots/6.7 Nationalismus – {cont_comp_feature}.pdf")
fig.show()
In [44]:
meta_anth_bin['behandelt_deutschen_mittelraum'].mean()
Out[44]:
0.6816216216216217
In [45]:
binarize_meta(meta_muench)['behandelt_deutschen_mittelraum'].mean()
Out[45]:
0.5928571428571429
In [46]:
binarize_meta(meta_modcanon)['behandelt_deutschen_mittelraum'].mean()
Out[46]:
0.2743362831858407
Anthologien¶
In [47]:
def create_anthology_nationalism_plot(
texts_start = 1850,
texts_end = 1875,
only_geschichtslyrik = True,
anthology_start = 1885,
min_text_count = 20,
plot_type = 'bar'
):
anthology_df = pd.DataFrame()
for anthology in meta.query("anthology_year_used_ed <= 2000")['anthology'].unique():
anthology_meta = meta.query("anthology == @anthology")
anthology_texts_meta = anthology_meta.query("@texts_start <= year <= @texts_end")
if only_geschichtslyrik:
anthology_texts_meta = anthology_texts_meta.query("geschichtslyrik == 1")
anthology_df.at[anthology, 'anthology_year'] = anthology_meta['anthology_year_used_ed'].tolist()[0]
anthology_df.at[anthology, 'annotated_count'] = anthology_texts_meta.query("annotated").shape[0]
anthology_df.at[anthology, 'nationalismus_count'] = anthology_texts_meta.query("nationalismus == 1").shape[0]
anthology_df['nationalismus_share'] = anthology_df['nationalismus_count']/anthology_df['annotated_count']
meta_plot = anthology_df.query("anthology_year >= @anthology_start and annotated_count >= @min_text_count")
if plot_type == 'bar':
fig = px.bar(
meta_plot,
y = 'nationalismus_share',
hover_data = ['annotated_count'],
labels = {'nationalismus_share' : 'Anteil nationalistische Texte', 'index' : ''}
)
elif plot_type == 'bubble':
fig = px.scatter(
meta_plot,
x = 'anthology_year',
y = 'nationalismus_share',
size = 'annotated_count',
hover_name = meta_plot.index,
labels = {'nationalismus_share' : 'Anteil nationalistische Texte', 'anthology_year' : 'Publikationsjahr Anthologie'}
)
return fig
In [48]:
fig = create_anthology_nationalism_plot(
texts_start = 1850,
texts_end = 1875,
only_geschichtslyrik = True,
anthology_start = 1885,
min_text_count = 20,
)
fig.update_layout(
width=900, height=600,
xaxis=dict(tickfont=dict(size=16), titlefont=dict(size=16)),
yaxis=dict(tickfont=dict(size=16), titlefont=dict(size=16)),
legend=dict(font = dict(size=16), traceorder = 'normal'),
showlegend=False
)
fig.update_xaxes(tickangle=45)
fig = update_fig_for_publication(fig, make_grey=True)
fig.write_image(f"plots/6.7 Nationalistische Geschichtslyrik nach Anthologien.pdf")
fig.show()
In [49]:
fig = create_anthology_nationalism_plot(
texts_start = 1850,
texts_end = 2000,
only_geschichtslyrik = False,
anthology_start = 1885,
min_text_count = 20,
)
fig.update_layout(
width=1200, height=600,
xaxis=dict(tickfont=dict(size=16), titlefont=dict(size=16)),
yaxis=dict(tickfont=dict(size=16), titlefont=dict(size=16)),
legend=dict(font = dict(size=16), traceorder = 'normal'),
showlegend=False
)
fig.update_xaxes(tickangle=45)
fig.show()
Religiosität¶
In [50]:
main_feature = 'religiositaet'
In [51]:
meta_anth_bin.corr(numeric_only=True)[main_feature].sort_values(ascending = False).head(20)
Out[51]:
religiositaet 1.000000 religion_positiv 0.458569 liebe_positiv 0.319333 geschichtsauffassung_positiv 0.272853 religion 0.269505 stoffgebiet_positiv 0.193714 tod_positiv 0.188286 politik_positiv 0.182373 sprechinstanz_nicht_in_vergangenheit 0.181845 bekanntes_individuum_positiv 0.181566 sprechakt_behaupten_vorhanden 0.149698 nationalismus 0.143906 gegenwartsbezug 0.141699 entity_positiv 0.137040 ueberlieferung_positiv 0.129081 ende 0.122745 sprechakte_count 0.122678 zeit_mitte 0.121334 beginn 0.117275 nogenre 0.114250 Name: religiositaet, dtype: float64
In [52]:
meta_anth_bin.corr(numeric_only=True)[main_feature].sort_values(ascending = True).head(20)
Out[52]:
religion_negativ -0.234767 geschichtsauffassung_negativ -0.182759 entity_neutral -0.153711 stoffgebiet_neutral -0.131207 in_hohem_mass_konkret -0.122761 konkretheit -0.122692 rollengedicht -0.118910 liebe_negativ -0.114708 tod_negativ -0.109694 wissen_ergaenzend -0.107934 antike -0.100265 sprechinstanz_in_vergangenheit -0.094410 politik_negativ -0.093908 year_predict_ages_mean -0.089214 stoffgebiet_negativ -0.082987 unbekanntes_individuum_count -0.075875 decade -0.067600 year -0.067216 nation_volk_d_negativ -0.066667 objektmarker_vorhanden -0.061688 Name: religiositaet, dtype: float64
In [53]:
threshold = 0.2
bin_comp_features = get_features(meta_anth_bin.corr(numeric_only=True)[main_feature], threshold = threshold, mode = 'bin')
cont_comp_features = get_features(meta_anth_bin.corr(numeric_only=True)[main_feature], threshold = threshold, mode = 'cont')
In [54]:
results = relations_binbin(
meta = meta_anth_bin,
main_feature = main_feature,
comp_features = bin_comp_features
)
In [55]:
directly_related = []
results_filtered = (
results
.query("index not in @directly_related")
.query("chi2_p < 0.05 and min_expected >= 5 and phi >= @threshold")
.sort_values(by = 'diff', ascending = False)
)
round(results_filtered, 2)
Out[55]:
| wenn_nicht | wenn_nicht_detail | wenn_ja | wenn_ja_detail | diff_low_bootstrap | diff_low | diff | diff_high | diff_high_bootstrap | chi2 | chi2_p | fisher_p | phi | min_real | min_expected | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| religion_positiv | 0.30 | 47/155 | 0.77 | 86/112 | 0.36 | 0.36 | 0.46 | 0.57 | 0.57 | 56.15 | 0.00 | 0.00 | 0.46 | 26.0 | 55.79 |
| geschichtsauffassung_positiv | 0.27 | 17/62 | 0.60 | 9/15 | 0.06 | 0.05 | 0.33 | 0.60 | 0.58 | 5.73 | 0.02 | 0.03 | 0.27 | 6.0 | 5.06 |
| religion | 0.10 | 155/1532 | 0.35 | 112/318 | 0.20 | 0.20 | 0.25 | 0.31 | 0.31 | 134.37 | 0.00 | 0.00 | 0.27 | 112.0 | 45.90 |
| religion_negativ | 0.16 | 25/155 | 0.02 | 2/112 | -0.21 | -0.21 | -0.14 | -0.08 | -0.08 | 14.72 | 0.00 | 0.00 | 0.23 | 2.0 | 11.33 |
In [56]:
results_other = results.query("index not in @results_filtered.index")
round(results_other.sort_values(by='diff', ascending=False), 2)
Out[56]:
| wenn_nicht | wenn_nicht_detail | wenn_ja | wenn_ja_detail | diff_low_bootstrap | diff_low | diff | diff_high | diff_high_bootstrap | chi2 | chi2_p | fisher_p | phi | min_real | min_expected | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| liebe_positiv | 0.46 | 26/57 | 1.00 | 6/6 | 0.40 | 0.41 | 0.54 | 0.67 | 0.67 | 6.42 | 0.01 | 0.02 | 0.32 | 0.0 | 2.95 |
| bekanntes_individuum_positiv | 0.53 | 896/1689 | 0.76 | 256/336 | 0.18 | 0.18 | 0.23 | 0.28 | 0.28 | 61.19 | 0.00 | 0.00 | 0.17 | 80.0 | 144.85 |
| stoffgebiet_positiv | 0.43 | 891/2088 | 0.65 | 293/452 | 0.17 | 0.17 | 0.22 | 0.27 | 0.27 | 73.26 | 0.00 | 0.00 | 0.17 | 159.0 | 210.70 |
| entity_positiv | 0.45 | 1469/3271 | 0.62 | 402/645 | 0.14 | 0.13 | 0.17 | 0.22 | 0.22 | 65.49 | 0.00 | 0.00 | 0.13 | 243.0 | 308.17 |
| unbekanntes_individuum_positiv | 0.33 | 184/560 | 0.46 | 33/71 | 0.02 | 0.01 | 0.14 | 0.26 | 0.25 | 5.18 | 0.02 | 0.03 | 0.09 | 33.0 | 24.42 |
| kollektiv_positiv | 0.36 | 307/855 | 0.43 | 89/209 | 0.00 | -0.01 | 0.07 | 0.14 | 0.14 | 3.20 | 0.07 | 0.08 | 0.05 | 89.0 | 77.79 |
| kollektiv_negativ | 0.25 | 211/855 | 0.27 | 57/209 | -0.04 | -0.04 | 0.03 | 0.09 | 0.09 | 0.60 | 0.44 | 0.48 | 0.02 | 57.0 | 52.64 |
| stoffgebiet_ambivalent | 0.13 | 275/2088 | 0.14 | 63/452 | -0.03 | -0.03 | 0.01 | 0.04 | 0.04 | 0.19 | 0.66 | 0.65 | 0.01 | 63.0 | 60.15 |
| entity_negativ | 0.17 | 543/3271 | 0.16 | 101/645 | -0.04 | -0.04 | -0.01 | 0.02 | 0.02 | 0.35 | 0.56 | 0.60 | 0.01 | 101.0 | 106.07 |
| entity_ambivalent | 0.06 | 210/3271 | 0.05 | 34/645 | -0.03 | -0.03 | -0.01 | 0.01 | 0.01 | 1.22 | 0.27 | 0.29 | 0.02 | 34.0 | 40.19 |
| unbekanntes_individuum_negativ | 0.13 | 74/560 | 0.10 | 7/71 | -0.10 | -0.11 | -0.03 | 0.04 | 0.05 | 0.63 | 0.43 | 0.57 | 0.03 | 7.0 | 9.11 |
| bekanntes_individuum_negativ | 0.14 | 241/1689 | 0.10 | 35/336 | -0.07 | -0.08 | -0.04 | -0.00 | -0.00 | 3.53 | 0.06 | 0.07 | 0.04 | 35.0 | 45.80 |
| stoffgebiet_negativ | 0.23 | 474/2088 | 0.14 | 62/452 | -0.12 | -0.13 | -0.09 | -0.05 | -0.05 | 18.01 | 0.00 | 0.00 | 0.08 | 62.0 | 95.38 |
| stoffgebiet_neutral | 0.21 | 448/2088 | 0.08 | 34/452 | -0.17 | -0.17 | -0.14 | -0.11 | -0.11 | 46.92 | 0.00 | 0.00 | 0.14 | 34.0 | 85.77 |
| entity_neutral | 0.32 | 1049/3271 | 0.17 | 108/645 | -0.19 | -0.19 | -0.15 | -0.12 | -0.12 | 60.79 | 0.00 | 0.00 | 0.12 | 108.0 | 190.57 |
In [57]:
result_categories = ['wenn_nicht', 'wenn_nicht_detail', 'wenn_ja', 'wenn_ja_detail', 'diff', 'chi2_p', 'phi',]
results_a = relations_binbin(
meta = meta_anth_bin.query("1850 <= year <= 1884"),
main_feature = main_feature,
comp_features = results_filtered.index
)
results_b = relations_binbin(
meta = meta_anth_bin.query("1885 <= year <= 1918"),
main_feature = main_feature,
comp_features = results_filtered.index
)
results_merged = results_a[result_categories].join(
results_b[result_categories],
lsuffix='_1850', rsuffix = '_1885'
)
results_merged['diff_of_diffs'] = results_merged['diff_1885'] - results_merged['diff_1850']
results_merged['diff_of_phis'] = results_merged['phi_1885'] - results_merged['phi_1850']
round(results_merged.sort_values(by = 'diff_of_phis'), 3)
Out[57]:
| wenn_nicht_1850 | wenn_nicht_detail_1850 | wenn_ja_1850 | wenn_ja_detail_1850 | diff_1850 | chi2_p_1850 | phi_1850 | wenn_nicht_1885 | wenn_nicht_detail_1885 | wenn_ja_1885 | wenn_ja_detail_1885 | diff_1885 | chi2_p_1885 | phi_1885 | diff_of_diffs | diff_of_phis | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| religion | 0.115 | 126/1099 | 0.401 | 99/247 | 0.286 | 0.000 | 0.297 | 0.067 | 29/433 | 0.183 | 13/71 | 0.116 | 0.001 | 0.146 | -0.170 | -0.151 |
| geschichtsauffassung_positiv | 0.234 | 11/47 | 0.600 | 6/10 | 0.366 | 0.022 | 0.304 | 0.400 | 6/15 | 0.600 | 3/5 | 0.200 | 0.436 | 0.174 | -0.166 | -0.130 |
| religion_negativ | 0.175 | 22/126 | 0.020 | 2/99 | -0.154 | 0.000 | 0.248 | 0.103 | 3/29 | 0.000 | 0/13 | -0.103 | 0.229 | 0.186 | 0.051 | -0.063 |
| religion_positiv | 0.333 | 42/126 | 0.758 | 75/99 | 0.424 | 0.000 | 0.422 | 0.172 | 5/29 | 0.846 | 11/13 | 0.674 | 0.000 | 0.641 | 0.249 | 0.220 |
In [58]:
results = relations_bincont(
meta = meta_anth_bin,
main_feature = main_feature,
comp_features = cont_comp_features
)
In [59]:
results
Out[59]:
In [60]:
meta_plot = meta_anth_bin.copy()
for cont_comp_feature in cont_comp_features:
mean_main = meta_plot[meta_plot[main_feature] == 1][cont_comp_feature].mean()
mean_notmain = meta_plot[meta_plot[main_feature] == 0][cont_comp_feature].mean()
label_main = f"Religiöse Texte<br>(Mittelwert = {round(mean_main, 2)})"
label_notmain = f"Nicht religiöse Texte<br>(Mittelwert = {round(mean_notmain, 2)})"
meta_plot['plot_legend'] = [label_main if x == 1 else label_notmain for x in meta_plot[main_feature]]
fig = px.histogram(
meta_plot,
x = cont_comp_feature,
color = 'plot_legend',
histnorm = 'probability density',
barmode = 'group',
labels = {'plot_legend' : '', 'stoffgebiet_positiv' : 'Anzahl positiv bewertete Stoffgebiete',}
)
fig.update_layout(
width = 700, height = 300,
yaxis_title="Anteil",
xaxis=dict(tickfont=dict(size=16), titlefont=dict(size=16)),
yaxis=dict(tickfont=dict(size=16), titlefont=dict(size=16)),
legend=dict(font = dict(size=16), x=0.6, y = 0.95),
bargap=0.1
)
# fig.write_image(f"plots/6.7 Religiosität – {cont_comp_feature}.pdf")
fig.show()